library(npiv)
library(tidyverse)
library(rstatix)
library(gridExtra)
library(psych)

###############################################################################
#THRESHOLD COMPUTATION
#for threshold computation please see the following paper 
#Yu, P., & Phillips, P.C.B. (2018). “Threshold regression with endogeneity”. Journal of Econometrics, 203, 50–68.
#contact person for code: pingyu@hku.hk

###############################################################################
#ENGEL CURVES AND ELASTICITY ESTIMATION

categories <- list(
  logcat = list(datR$logexpA, datR$logexpHT, datR$logexpC, datR$logexpClo, datR$logexpHPC, datR$logexpComm, datR$logexpT, datR$logexpL, datR$logexpIstr),  
  name = c("Food", "AlcTob", "Housing", "Clothes", "Health", "Communication", "Transports", "Leisure", "Education")  
)

for (i in seq_along(categories$logcat)) {
  
  logexp_vec <- categories$logcat[[i]]
  nome_cat <- categories$name[i]
  data<-datR
  data$logexpcat <- logexp_vec
  data <-data[!is.na(data$logexpcat),]
  data <- data[order(data$exp),] 
  
  #Engel Curve 
  npEC<-npiv(logexpcat~logexp|from_sp8, data=data)
  data$h<-npEC$h
  data$h.upper<-npEC$h.upper
  data$h.lower<-npEC$h.lower
  assign(paste0("npEC_", nome_cat), npEC)
  
  npEC <- get(paste0("npEC_", nome_cat))
  plot_nomecat<- ggplot(data = data, aes(x = logexp, y = logexpcat)) +
    geom_point(color = "grey79", size = 0.3) +
    geom_line(aes(y = h), color = "grey28", size = 1) +
    geom_line(aes(y = h.upper), color = "grey28", linetype = "dashed", size = 0.5) +
    geom_line(aes(y = h.lower), color = "grey28", linetype = "dashed", size = 0.5) +
    scale_x_continuous(limits = c(5.5, 9.5)) +
    scale_y_continuous(limits = c(0, 9)) +
    ggtitle(nome_cat) +
    labs(x = "log(Total Exp.)", y = "log(Exp.)") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
    theme(title =element_text(size=12), axis.text=element_text(size=8),axis.title=element_text(size=10))+theme_classic() 
  
  assign(paste0("EC_", nome_cat), plot_nomecat)
  
  #Overall model
  reg<-lm(logexpcat~as.factor(gender)+as.factor(risk)+as.factor(marital)+as.factor(drip3)+as.factor(d5edu)+lmidp, data=data)
  data$res<-reg$residuals
  np<-npiv(res~logexp|from_sp8, data = data)
  assign(paste0("np_", nome_cat), np)
  np <- get(paste0("np_", nome_cat))
  
  #Happy-Unhappy model
  thresh_value <- 3.8 #this value is estimated using IDKE (Yu and Phillips, 2018)
  unhappy <- subset(data, data$Factor <= thresh_value)
  happy <- subset(data, data$Factor > thresh_value)
  
  reghappy<-lm(logexpcat~as.factor(gender)+as.factor(risk)+as.factor(marital)+as.factor(drip3)+as.factor(d5edu)+lmidp, data=happy)
  happy$reshappy<-reghappy$residuals
  nphappy<-npiv(reshappy~logexp|from_sp8, data=happy)
  
  regunhappy<-lm(logexpcat~as.factor(gender)+as.factor(risk)+as.factor(marital)+as.factor(drip3)+as.factor(d5edu)+lmidp, data=unhappy)
  unhappy$resunhappy<-regunhappy$residuals
  npunhappy<-npiv(resunhappy~logexp|from_sp8, data=unhappy)
  
  assign(paste0("nphappy_", nome_cat), nphappy)
  nphappy <- get(paste0("nphappy_", nome_cat))
  assign(paste0("npunhappy_", nome_cat), npunhappy)
  npunhappy <- get(paste0("npunhappy_", nome_cat))
  
  #Mean elasticities and equality test
  cat("\nExp. Category:", nome_cat)
  cat("\nMean Elasticity - Overall:", mean(np$deriv, na.rm = TRUE))
  cat("\nMean Elasticity - Happy:", mean(nphappy$deriv, na.rm = TRUE))
  cat("\nMean Elasticity - Unhappy:", mean(npunhappy$deriv, na.rm = TRUE))
  cat("\nTest Happy=Unhappy")
  print(t.test(nphappy$deriv, npunhappy$deriv))
  
  #Quantiles
  q1 <- quantile(datR$exp, probs = 0.1, na.rm = TRUE)
  q9 <- quantile(datR$exp, probs = 0.9, na.rm = TRUE)
  
  #Mean elasticities by quantiles: Happy
  cat("\nMean Elasticity - Happy, Q1:", mean(nphappy$deriv[happy$exp <= q1], na.rm = TRUE))
  cat("\nMean Elasticity - Happy, Q2:", mean(nphappy$deriv[happy$exp > q1 & happy$exp <= q9], na.rm = TRUE))
  cat("\nMean Elasticity - Happy, Q3:", mean(nphappy$deriv[happy$exp > q9], na.rm = TRUE))
  
  #Mean elasticities by quantiles: Unhappy
  cat("\nMean Elasticity - Unhappy, Q1:", mean(npunhappy$deriv[unhappy$exp <= q1], na.rm = TRUE))
  cat("\nMean Elasticity - Unhappy, Q2:", mean(npunhappy$deriv[unhappy$exp > q1 & unhappy$exp <= q9], na.rm = TRUE))
  cat("\nMean Elasticity - Unhappy, Q3:", mean(npunhappy$deriv[unhappy$exp > q9], na.rm = TRUE))
  
  #Tests for equality of mean elasticities of Happy and Unhappy by quantiles
  cat("\nTest Happy=Unhappy, Q1")
  print(t.test(nphappy$deriv[happy$exp <= q1], npunhappy$deriv[unhappy$exp <= q1]))
  
  cat("\nTest Happy=Unhappy, Q2")
  print(t.test(nphappy$deriv[happy$exp > q1 & happy$exp <= q9],
               npunhappy$deriv[unhappy$exp > q1 & unhappy$exp <= q9]))
  
  cat("\nTest Happy=Unhappy, Q3")
  print(t.test(nphappy$deriv[happy$exp > q9 & happy$exp < 4500],
               npunhappy$deriv[unhappy$exp > q9 & unhappy$exp < 4500]))
  
  #Test for equality of mean elasticities across the different quantiles: Happy group  
  happy$ela<-nphappy$deriv
  happy$psp<-1
  happy$psp[happy$exp>q1 & happy$exp<=q9]<-2
  happy$psp[happy$exp>q9]<-3
  
  happy %>% group_by(psp) %>% get_summary_stats(ela, type = "mean_sd")
  res.aov <- happy %>% anova_test(ela ~ psp)
  print(res.aov)
  
  #Test for equality of mean elasticities across the different quantiles: Unhappy group  
  unhappy$ela<-npunhappy$deriv
  unhappy$psp<-1
  unhappy$psp[unhappy$exp>q1 & unhappy$exp<=q9]<-2
  unhappy$psp[unhappy$exp>q9]<-3
  
  unhappy %>% group_by(psp) %>% get_summary_stats(ela, type = "mean_sd")
  res.aov <- unhappy %>% anova_test(ela ~ psp)
  print(res.aov)
  
  #Elasticity curves  
  unhappy$ela<-npunhappy$deriv
  unhappy$ela_sup<-npunhappy$deriv+1.64*npunhappy$deriv.asy.se/sqrt(nrow(unhappy))
  unhappy$ela_inf<-npunhappy$deriv-1.64*npunhappy$deriv.asy.se/sqrt(nrow(unhappy))
  happy$ela<-nphappy$deriv
  happy$ela_sup<-nphappy$deriv+1.64*nphappy$deriv.asy.se/sqrt(nrow(happy))
  happy$ela_inf<-nphappy$deriv-1.64*nphappy$deriv.asy.se/sqrt(nrow(happy))
  phappy<-subset(happy,exp>400 & exp<4500)
  punhappy<-subset(unhappy,exp>400 & exp<4500)
  
  plot2_nomecat<-ggplot(phappy, aes(x=exp, y=ela)) +  geom_smooth(se=FALSE, color = "grey79")+  geom_smooth(data = punhappy, color = "grey28", se=FALSE)+
    geom_smooth(data = phappy, aes(x=exp, y=ela_inf), se=FALSE, color = "grey79", lwd=0.2, linetype="dashed") +
    geom_smooth(data = phappy, aes(x=exp, y=ela_sup), se=FALSE, color = "grey79", lwd=0.2, linetype="dashed") +
    geom_smooth(data = punhappy, aes(x=exp, y=ela_inf), se=FALSE, color = "grey28", lwd=0.2, linetype="dashed") +
    geom_smooth(data = punhappy, aes(x=exp, y=ela_sup), se=FALSE, color = "grey28", lwd=0.2, linetype="dashed") +
    ggtitle(nome_cat) + xlab("Total expenditure") + ylab("Elasticity") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
    theme(title =element_text(size=12), axis.text=element_text(size=8),axis.title=element_text(size=10))+theme_classic()+scale_y_continuous(limits = c(0, 2)) 
  
  assign(paste0("ElaPlot_", nome_cat), plot2_nomecat)
  
}  

###############################################################################
#FIGURE D1 in APPENDIX
grid.arrange(EC_Food, EC_AlcTob, EC_Clothes, EC_Housing, EC_Health, 
             EC_Communication, EC_Transports, EC_Education, EC_Leisure, ncol = 3)

###############################################################################
#FIGURE 1
grid.arrange(ElaPlot_Food, ElaPlot_AlcTob, ElaPlot_Clothes, ElaPlot_Housing, ElaPlot_Health, 
             ElaPlot_Communication, ElaPlot_Transports, ElaPlot_Education, ElaPlot_Leisure, ncol = 3)


